# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html

from scrapy.pipelines.images import ImagesPipeline
from scrapy.exporters import JsonItemExporter
from twisted.enterprise import adbapi
import MySQLdb
import MySQLdb.cursors
import codecs
import json


class ArticlespiderPipeline(object):
    def process_item(self, item, spider):
        return item


class JsonWithEncodingPipeline(object):
    """
    json数据格式的存储管道类
    """
    def __init__(self):
        self.file = codecs.open('article.json', 'w', encoding='utf-8')

    def process_item(self, item, spider):
        lines = json.dumps(dict(item), ensure_ascii=False) + "\n"
        self.file.write(lines)
        return item

    def spider_closed(self):
        self.file.close()


class MysqlPipeline(object):
    """
    mysql数据直接存储类
    """
    def __init__(self):
        self.connet = MySQLdb.connect('127.0.0.1', 'root', 'root', 'article_spider', charset='utf8', use_unicode=True)
        self.cursor = self.connet.cursor()  # 数据库的操作具体执行只有cursor来处理的

    def process_item(self, item, spider):
        insert_sql = """
            INSERT INTO `article_spider`.`jobbole_article` 
            (`title`, `create_date`, `url`, `url_object_id`, `fav_nums`) 
            VALUES (%s, %s, %s, %s, %s)    
        """
        self.cursor.execute(insert_sql, (item['title'], item['createDate'], item['url'], item['url_object_id'], item['fav_nums']))
        self.connet.commit()


class MysqlTwistedPipeline(object):
    """
    使用twisted自带的数据库连接池处理的一步存储过程
    """
    def __init__(self, dbpool):
        self.dbpool = dbpool

    @classmethod
    def from_settings(cls, settings):
        dbparams=dict(
            host=settings['MYSQL_HOST'],
            db=settings['MYSQL_DBNAME'],
            user=settings['MYSQL_USER'],
            password=settings['MYSQL_PASSWORD'],
            charset='utf8',
            cursorclass=MySQLdb.cursors.DictCursor,
            use_unicode=True
        )
        dbpool = adbapi.ConnectionPool('MySQLdb', **dbparams)
        return cls(dbpool)

    def process_item(self, item, spider):
        # 使用twisted将mysql的插入处理为异步的操作
        query = self.dbpool.runInteraction(self.do_insert, item)
        # 处理插入错误的回调
        query.addErrback(self.hander_error)

    def do_insert(self, cursor, item):
        # 实际的数据库插入过程
        insert_sql = """
                    INSERT INTO `article_spider`.`jobbole_article` 
                    (`title`, `create_date`, `url`, `url_object_id`, `fav_nums`) 
                    VALUES (%s, %s, %s, %s, %s)    
                """
        cursor.execute(insert_sql,(item['title'], item['createDate'], item['url'], item['url_object_id'], item['fav_nums']))

    def hander_error(self, failure):
        print(failure)


class ArticleSpiderImagePipeline(ImagesPipeline):
    """
    爬取图片在本地存储之后路径的管道类
    """
    def item_completed(self, results, item, info):
        if 'front_image_url' in item:
            for ok, value in results:
                image_file_path = value['path']
            item['front_image_path'] = image_file_path
        return item
